##############################
##    functions to sample   ##
##         the cces         ##
##  assign to income groups ##
## to feed to the bootstrap ##
##############################

library(tidyverse)

subset_unique_questions <- function(data, question_column = 'question') {
  output <- list()
  
  for (i in unique(data[[question_column]])) {
    temp.df <- data %>% 
      filter(!!sym(question_column) == i)
    if (length(unique(temp.df$year)) > 1) {
      temp.df <- temp.df %>% 
        filter(year == max(unique(temp.df$year)))
    }
    output[[i]] <- temp.df
  }
  out_df <- bind_rows(output)
  return(out_df)
}

bootstrap_cces <- function(cces) {
  boot_sample <- sample(nrow(cces), prob = cces$weight, replace = TRUE)
  boot_df <- cces[boot_sample,]
  
  return(boot_df)
}

draw_classes <- function(cces, quantile = 0.1) {
  faminc_groups <- sort(unique(cces$faminc))
  decile_n <- floor(nrow(cces)*quantile)
  
  bottom <- c()
  temp_n <- decile_n
  for (i in faminc_groups) {
    if (temp_n > 0) {
      temp_class <- which(cces$faminc == i)
      if (length(temp_class) <= temp_n) {
        bottom <- c(bottom, temp_class)
        temp_n <- temp_n - length(temp_class)
      } else {
        temp_sample <- sample(temp_class, temp_n, replace = FALSE, prob = cces$weight[temp_class])
        bottom <- c(bottom, temp_sample)
        temp_n <- temp_n - length(temp_sample)
      }
    }
  }
  cces[bottom,'faminc_bottom'] <- 1
  cces$faminc_bottom[is.na(cces$faminc_bottom)] <- 0
  
  top <- c()
  temp_n <- decile_n
  for (i in sort(faminc_groups, decreasing = TRUE)) {
    if (temp_n > 0) {
      temp_class <- which(cces$faminc == i)
      if (length(temp_class) <= temp_n) {
        top <- c(top, temp_class)
        temp_n <- temp_n - length(temp_class)
      } else {
        temp_sample <- sample(temp_class, temp_n, replace = FALSE, prob = cces$weight[temp_class])
        top <- c(top, temp_sample)
        temp_n <- temp_n - length(temp_sample)
      }
    }
  }
  cces[top, 'faminc_top'] <- 1
  cces$faminc_top[is.na(cces$faminc_top)] <- 0
  
  middle <- c()
  temp_sort <- sort(cces$faminc)
  temp_key <- table(temp_sort[(floor((length(temp_sort)/2) - (decile_n/2))):(floor((length(temp_sort)/2) + (decile_n/2)))])
  for (i in 1:length(temp_key)) {
    temp_class <- which(cces$faminc == names(temp_key[i]))
    temp_sample <- sample(temp_class, temp_key[i], replace = FALSE, prob = cces$weight[temp_class])
    middle <- c(middle, temp_sample)
  }
  
  cces[middle, 'faminc_middle'] <- 1
  cces$faminc_middle[is.na(cces$faminc_middle)] <- 0
  
  return(cces)
}

boot_sample_cces <- function(full_cces, quantile = 0.1) {
  boots_by_year <- list()
  
  for (y in unique(full_cces$year)) {
    cces_y <- full_cces %>% 
      select(-pap_topic, -topic_3, -topic_6) %>% 
      filter(year == y) %>% 
      spread(key = question, value = opinion)
    
    boots_by_year[[as.character(y)]] <- cces_y %>% 
      do(bootstrap_cces(.)) %>% 
      do(draw_classes(., quantile)) %>% 
      gather(key = question, value = opinion,
             -uniqueID, -weight, -year, -race, -race_new, -faminc, -pid3, -pid7, -educ, -ideo5, 
             -faminc_bottom, -faminc_top, -faminc_middle, -voter, -female, -bornagain, -urbanrural, -agegrp, -church,
             -starts_with('state'))
  }
  
  boot_allyears <- bind_rows(boots_by_year)
  return(boot_allyears)
}

boot_sample_cces_within_party <- function(full_cces, quantile = 0.1) {
  # NOTE: This assigns class within party but does not draw separate bootstrapped samples for each party
  boots_by_year <- list()
  
  for (y in unique(full_cces$year)) {
    for (p in c('Democrat', 'Republican')) {
      cces_y <- full_cces %>% 
        select(-pap_topic, -topic_3, -topic_6) %>% 
        filter(year == y, pid3 == p) %>% 
        spread(key = question, value = opinion)
      
      boots_by_year[[paste(y, p)]] <- cces_y %>% 
        do(bootstrap_cces(.)) %>% 
        do(draw_classes(., quantile)) %>% 
        gather(key = question, value = opinion,
               -uniqueID, -weight, -year, -race, -race_new, -faminc, -pid3, -pid7, -educ, -ideo5, 
               -faminc_bottom, -faminc_top, -faminc_middle, -voter, -female, -bornagain, -urbanrural, -agegrp, -church, 
               -starts_with('state'))
    }
  }
  
  boot_allyears <- bind_rows(boots_by_year)
  return(boot_allyears)
}

